{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from Bio import Entrez, Medline, SeqIO"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Do not forget to inform NCBI of your email address (change below)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "Entrez.email = \"put@your_email.here\" "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'DbList': ['pubmed', 'protein', 'nuccore', 'ipg', 'nucleotide', 'nucgss', 'nucest', 'structure', 'sparcle', 'genome', 'annotinfo', 'assembly', 'bioproject', 'biosample', 'blastdbinfo', 'books', 'cdd', 'clinvar', 'clone', 'gap', 'gapplus', 'grasp', 'dbvar', 'gene', 'gds', 'geoprofiles', 'homologene', 'medgen', 'mesh', 'ncbisearch', 'nlmcatalog', 'omim', 'orgtrack', 'pmc', 'popset', 'probe', 'proteinclusters', 'pcassay', 'biosystems', 'pccompound', 'pcsubstance', 'pubmedhealth', 'seqannot', 'snp', 'sra', 'taxonomy', 'biocollections', 'unigene', 'gencoll', 'gtr']}\n"
     ]
    }
   ],
   "source": [
    "#This gives you the list of available databases\n",
    "handle = Entrez.einfo()\n",
    "rec = Entrez.read(handle)\n",
    "print(rec)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "handle = Entrez.esearch(db=\"nucleotide\", term='CRT[Gene Name] AND \"Plasmodium falciparum\"[Organism]')\n",
    "rec_list = Entrez.read(handle)\n",
    "if rec_list['RetMax'] < rec_list['Count']:\n",
    "    handle = Entrez.esearch(db=\"nucleotide\", term='CRT[Gene Name] AND \"Plasmodium falciparum\"[Organism]',\n",
    "                            retmax=rec_list['Count'])\n",
    "    rec_list = Entrez.read(handle)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "id_list = rec_list['IdList']\n",
    "hdl = Entrez.efetch(db='nucleotide', id=id_list, rettype='gb', retmax=rec_list['Count'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "recs = list(SeqIO.parse(hdl, 'gb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "KM288867\n",
      "Plasmodium falciparum clone PF3D7_0709000 chloroquine resistance transporter (CRT) gene, complete cds\n"
     ]
    }
   ],
   "source": [
    "for rec in recs:\n",
    "    if rec.name == 'KM288867':\n",
    "        break\n",
    "print(rec.name)\n",
    "print(rec.description)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "not processed:\n",
      "type: source\n",
      "location: [0:10000](+)\n",
      "qualifiers:\n",
      "    Key: clone, Value: ['PF3D7_0709000']\n",
      "    Key: db_xref, Value: ['taxon:5833']\n",
      "    Key: mol_type, Value: ['genomic DNA']\n",
      "    Key: organism, Value: ['Plasmodium falciparum']\n",
      "\n",
      "['CRT']\n",
      "not processed:\n",
      "type: mRNA\n",
      "location: join{[2751:3543](+), [3720:3989](+), [4168:4341](+), [4513:4646](+), [4799:4871](+), [4994:5070](+), [5166:5249](+), [5376:5427](+), [5564:5621](+), [5769:5862](+), [6055:6100](+), [6247:6302](+), [6471:7598](+)}\n",
      "qualifiers:\n",
      "    Key: gene, Value: ['CRT']\n",
      "    Key: product, Value: ['chloroquine resistance transporter']\n",
      "\n",
      "not processed:\n",
      "type: 5'UTR\n",
      "location: [2751:3452](+)\n",
      "qualifiers:\n",
      "    Key: gene, Value: ['CRT']\n",
      "\n",
      "not processed:\n",
      "type: primer_bind\n",
      "location: [2935:2958](+)\n",
      "qualifiers:\n",
      "\n",
      "not processed:\n",
      "type: primer_bind\n",
      "location: [3094:3121](+)\n",
      "qualifiers:\n",
      "\n",
      "not processed:\n",
      "type: CDS\n",
      "location: join{[3452:3543](+), [3720:3989](+), [4168:4341](+), [4513:4646](+), [4799:4871](+), [4994:5070](+), [5166:5249](+), [5376:5427](+), [5564:5621](+), [5769:5862](+), [6055:6100](+), [6247:6302](+), [6471:6548](+)}\n",
      "qualifiers:\n",
      "    Key: codon_start, Value: ['1']\n",
      "    Key: gene, Value: ['CRT']\n",
      "    Key: product, Value: ['chloroquine resistance transporter']\n",
      "    Key: protein_id, Value: ['AIW62921.1']\n",
      "    Key: translation, Value: ['MKFASKKNNQKNSSKNDERYRELDNLVQEGNGSRLGGGSCLGKCAHVFKLIFKEIKDNIFIYILSIIYLSVCVMNKIFAKRTLNKIGNYSFVTSETHNFICMIMFFIVYSLFGNKKGNSKERHRSFNLQFFAISMLDACSVILAFIGLTRTTGNIQSFVLQLSIPINMFFCFLILRYRYHLYNYLGAVIIVVTIALVEMKLSFETQEENSIIFNLVLISALIPVCFSNMTREIVFKKYKIDILRLNAMVSFFQLFTSCLILPVYTLPFLKQLHLPYNEIWTNIKNGFACLFLGRNTVVENCGLGMAKLCDDCDGAWKTFALFSFFNICDNLITSYIIDKFSTMTYTIVSCIQGPAIAIAYYFKFLAGDVVREPRLLDFVTLFGYLFGSIIYRVGNIILERKKMRNEENEDSEGELTNVDSIITQ']\n",
      "\n",
      "Exon 3452 3543 1\n",
      "Exon 3720 3989 1\n",
      "Exon 4168 4341 1\n",
      "not processed:\n",
      "type: primer_bind\n",
      "location: [4288:4323](-)\n",
      "qualifiers:\n",
      "\n",
      "Exon 4513 4646 1\n",
      "Exon 4799 4871 1\n",
      "Exon 4994 5070 1\n",
      "Exon 5166 5249 1\n",
      "Exon 5376 5427 1\n",
      "Exon 5564 5621 1\n",
      "Exon 5769 5862 1\n",
      "Exon 6055 6100 1\n",
      "Exon 6247 6302 1\n",
      "Exon 6471 6548 1\n",
      "not processed:\n",
      "type: 3'UTR\n",
      "location: [6548:7598](+)\n",
      "qualifiers:\n",
      "    Key: gene, Value: ['CRT']\n",
      "\n",
      "not processed:\n",
      "type: primer_bind\n",
      "location: [7833:7856](-)\n",
      "qualifiers:\n",
      "\n"
     ]
    }
   ],
   "source": [
    "for feature in rec.features:\n",
    "    if feature.type == 'gene':\n",
    "        print(feature.qualifiers['gene'])\n",
    "    elif feature.type == 'exon':\n",
    "        loc = feature.location\n",
    "        print('Exon', loc.start, loc.end, loc.strand)\n",
    "    else:\n",
    "        print('not processed:\\n%s' % feature)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "source=Plasmodium falciparum (malaria parasite P. falciparum)\n",
      "topology=linear\n",
      "taxonomy=['Eukaryota', 'Alveolata', 'Apicomplexa', 'Aconoidasida', 'Haemosporida', 'Plasmodiidae', 'Plasmodium', 'Plasmodium (Laverania)']\n",
      "sequence_version=1\n",
      "organism=Plasmodium falciparum\n",
      "accessions=['KM288867']\n",
      "keywords=['']\n",
      "date=12-NOV-2014\n",
      "molecule_type=DNA\n",
      "data_file_division=INV\n",
      "references=[Reference(title='Versatile control of Plasmodium falciparum gene expression with an inducible protein-RNA interaction', ...), Reference(title='Direct Submission', ...)]\n"
     ]
    }
   ],
   "source": [
    "for name, value in rec.annotations.items():\n",
    "    print('%s=%s' % (name, value))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "10000\n"
     ]
    }
   ],
   "source": [
    "print(len(rec.seq))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "25370483\n",
      "VI: 5\n",
      "DCOM: 20160112\n",
      "SO: Nat Commun. 2014 Nov 5;5:5329. doi: 10.1038/ncomms6329.\n",
      "GR: ['1DP2OD007124/OD/NIH HHS/United States', 'DP2 OD007124/OD/NIH HHS/United States', '5-T32-ES007020/ES/NIEHS NIH HHS/United States', 'T32 GM008334/GM/NIGMS NIH HHS/United States', 'T32 ES007020/ES/NIEHS NIH HHS/United States', 'P30 ES002109/ES/NIEHS NIH HHS/United States', '5-T32-GM08334/GM/NIGMS NIH HHS/United States']\n",
      "FAU: ['Goldfless, Stephen J', 'Wagner, Jeffrey C', 'Niles, Jacquin C']\n",
      "AD: Department of Biological Engineering, Massachusetts Institute of Technology, 77 Massachusetts Avenue, Cambridge, Massachusetts 02139, USA. Department of Biological Engineering, Massachusetts Institute of Technology, 77 Massachusetts Avenue, Cambridge, Massachusetts 02139, USA. Department of Biological Engineering, Massachusetts Institute of Technology, 77 Massachusetts Avenue, Cambridge, Massachusetts 02139, USA.\n",
      "MID: ['NIHMS630149']\n",
      "DEP: 20141105\n",
      "STAT: MEDLINE\n",
      "LR: 20161019\n",
      "RN: ['0 (Aptamers, Nucleotide)']\n",
      "CRDT: ['2014/11/06 06:00']\n",
      "SI: ['GENBANK/KM288848', 'GENBANK/KM288849', 'GENBANK/KM288850', 'GENBANK/KM288851', 'GENBANK/KM288852', 'GENBANK/KM288853', 'GENBANK/KM288854', 'GENBANK/KM288855', 'GENBANK/KM288856', 'GENBANK/KM288857', 'GENBANK/KM288858', 'GENBANK/KM288859', 'GENBANK/KM288860', 'GENBANK/KM288861', 'GENBANK/KM288862', 'GENBANK/KM288863', 'GENBANK/KM288864', 'GENBANK/KM288865', 'GENBANK/KM288866', 'GENBANK/KM288867']\n",
      "AID: ['ncomms6329 [pii]', '10.1038/ncomms6329 [doi]']\n",
      "PMID: 25370483\n",
      "LID: 10.1038/ncomms6329 [doi]\n",
      "AU: ['Goldfless SJ', 'Wagner JC', 'Niles JC']\n",
      "TI: Versatile control of Plasmodium falciparum gene expression with an inducible protein-RNA interaction.\n",
      "TA: Nat Commun\n",
      "PMC: PMC4223869\n",
      "MH: ['Aptamers, Nucleotide', 'Base Sequence', '*Gene Expression Regulation', '*Genetic Techniques', 'Molecular Sequence Data', 'Plasmodium falciparum/genetics/*metabolism']\n",
      "JT: Nature communications\n",
      "OWN: NLM\n",
      "JID: 101528555\n",
      "PST: epublish\n",
      "LA: ['eng']\n",
      "EDAT: 2014/11/06 06:00\n",
      "PL: England\n",
      "PG: 5329\n",
      "DP: 2014 Nov 5\n",
      "PHST: ['2014/04/15 00:00 [received]', '2014/09/20 00:00 [accepted]', '2014/11/06 06:00 [entrez]', '2014/11/06 06:00 [pubmed]', '2016/01/13 06:00 [medline]']\n",
      "MHDA: 2016/01/13 06:00\n",
      "PT: ['Journal Article', 'Research Support, N.I.H., Extramural', \"Research Support, Non-U.S. Gov't\"]\n",
      "SB: IM\n",
      "AB: The available tools for conditional gene expression in Plasmodium falciparum are limited. Here, to enable reliable control of target gene expression, we build a system to efficiently modulate translation. We overcame several problems associated with other approaches for regulating gene expression in P. falciparum. Specifically, our system functions predictably across several native and engineered promoter contexts, and affords control over reporter and native parasite proteins irrespective of their subcellular compartmentalization. Induction and repression of gene expression are rapid, homogeneous and stable over prolonged periods. To demonstrate practical application of our system, we used it to reveal direct links between antimalarial drugs and their native parasite molecular target. This is an important outcome given the rapid spread of resistance, and intensified efforts to efficiently discover and optimize new antimalarial drugs. Overall, the studies presented highlight the utility of our system for broadly controlling gene expression and performing functional genetics in P. falciparum.\n",
      "IS: 2041-1723 (Electronic) 2041-1723 (Linking)\n"
     ]
    }
   ],
   "source": [
    "refs = rec.annotations['references']\n",
    "for ref in refs:\n",
    "    if ref.pubmed_id != '':\n",
    "        print(ref.pubmed_id)\n",
    "        handle = Entrez.efetch(db=\"pubmed\", id=[ref.pubmed_id],\n",
    "                                rettype=\"medline\", retmode=\"text\")\n",
    "        records = Medline.parse(handle)\n",
    "        for med_rec in records:\n",
    "            for k, v in med_rec.items():\n",
    "                print('%s: %s' % (k, v))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
